In [1]:
import requests
import csv

In [2]:
###Create string name list from avaliable fields: http://www.rcsb.org/pdb/results/reportField.do
###Custom Report Web Services General info: http://www.rcsb.org/pdb/software/wsreport.do
se= "ndbId"
string_names = "classification,experimentalTechnique,macromoleculeType,residueCount,resolution,"+\
                "structureMolecularWeight,"+\
                "crystallizationMethod,crystallizationTempK,densityMatthews,densityPercentSol,"+\
                "pdbxDetails,phValue,publicationYear"
            
sequences_string_names = "sequence,residueCount,macromoleculeType"

In [3]:
#Main Pull
payload = {'pdbids': '*','service': 'wsfile', 'format': 'csv', 'primaryOnly': '1', 'CustomReportColumns':string_names}
r = requests.get('http://www.rcsb.org/pdb/rest/customReport', params=payload)

In [4]:
r.url


Out[4]:
'http://www.rcsb.org/pdb/rest/customReport?pdbids=%2A&service=wsfile&format=csv&primaryOnly=1&CustomReportColumns=classification%2CexperimentalTechnique%2CmacromoleculeType%2CresidueCount%2Cresolution%2CstructureMolecularWeight%2CcrystallizationMethod%2CcrystallizationTempK%2CdensityMatthews%2CdensityPercentSol%2CpdbxDetails%2CphValue%2CpublicationYear'

In [5]:
r.text.splitlines()[0]


Out[5]:
'structureId,classification,experimentalTechnique,macromoleculeType,residueCount,resolution,structureMolecularWeight,crystallizationMethod,crystallizationTempK,densityMatthews,densityPercentSol,pdbxDetails,phValue,publicationYear'

In [6]:
string_names.split(",")


Out[6]:
['classification',
 'experimentalTechnique',
 'macromoleculeType',
 'residueCount',
 'resolution',
 'structureMolecularWeight',
 'crystallizationMethod',
 'crystallizationTempK',
 'densityMatthews',
 'densityPercentSol',
 'pdbxDetails',
 'phValue',
 'publicationYear']

In [7]:
#writing the main pull
output_reader = csv.reader(r.text.splitlines())
with open('pdb_data_no_dups.csv', 'w') as csvfile:
    csv_writer = csv.writer(csvfile)
    for row in output_reader:
        csv_writer.writerow(row)

In [8]:
len(r.text.splitlines())


Out[8]:
135665

In [9]:
#sequence pull
payload_seq = {'pdbids': '*','service': 'wsfile', 'format': 'csv', 'primaryOnly': '1', 'CustomReportColumns':sequences_string_names}
r_seq = requests.get('http://www.rcsb.org/pdb/rest/customReport', params=payload_seq)

In [10]:
#write sequence pull
output_reader_seq = csv.reader(r_seq.text.splitlines())
with open('pdb_data_seq.csv', 'w') as csvfile:
    csv_writer_seq = csv.writer(csvfile)
    for row in output_reader_seq:
        csv_writer_seq.writerow(row)

reports = "StructureSummary,Sequence,Ligands,BindingAffinity,BiologicalDetails,ClusterEntity,"+\ "Domains,Crystallization,UnitCellDimensions,DataCollectionDetails,RefinementDetails"+\ "refinementParameters,NmrSoftware,NmrSpectrometer,NMRExperimentalSampleConditions,NmrRepresentative"+\ "NMRRefinement,NmrEnsemble,EMStructure,Citation,OtherCitations,SGProject"

payload_all = {'pdbids': '*','service': 'wsfile', 'format': 'csv', 'primaryOnly': '1', 'reportName':reports} r_all = requests.get('http://www.rcsb.org/pdb/rest/customReport', params=payload_all) output_reader_all = csv.reader(r_all.text.splitlines()) with open('pdb_data_all.csv', 'wb') as csvfile: csv_writer = csv.writer(csvfile) for row in output_reader: csv_writer.writerow(row)

r_all.url


In [ ]: